import psycopg2
import gzip
import re


DB_CONF = dict(database='crawl', user='bigdata', password='Youka888', host='192.168.31.102', port='5432')
SQL = """select good_inf from jingdong_good_inf limit %s offset %s"""
LIMIT = 100


def get_wetao_data(conn_dict, out_file):
    conn = psycopg2.connect(**conn_dict)
    cur = conn.cursor()

    offset = 0

    with gzip.open(out_file, 'wb') as f:
        while True:
            cur.execute(SQL, (LIMIT, offset))
            offset += LIMIT
            rows = cur.fetchall()
            if len(rows) == 0:
                break
            # print(rows)
            for row in rows:
                brand = row[0].split("'品牌': '")[-1].split("'")[0]
                shop = row[0].split("'店铺': '")[-1].split("'")[0]
                content = '\t'.join([brand, shop]) + '\n'
                if len(content) > 50:
                    print(content)
                    continue

                f.write(bytes(content, 'UTF-8'))
    conn.close()


if __name__ == '__main__':
    # import sys
    out = '/home/alpha/tmp/jd_good_brands.gz'
    get_wetao_data(DB_CONF, out)
